###########################################################
# LATURA (2015) SURVEY EXPERIMENT CODE

rm(list = ls())

# load packages and functions
R_folder <- "INSERT HERE"
source(paste0(R_folder, "survey_functions.R"))

# load data
data_folder <- "INSERT HERE"
load(paste0(data_folder, "latura_childcare.RData"))
print("Complete: loaded data")
Sys.time()

# images directory
images_directory <- "INSERT HERE"

# grayscale for the graphics or not
# print_colormodel <- "srgb" # in color
print_colormodel <- "grey"

# coefficient plot function
coef_plot_function <- function(start_ggplot, file_name, x_title_name,
                               scale_type = "fixed", print_colormodel,
                               main_title_name, factet_yes = TRUE, 
                               out_height, out_width, alpha_values) {
  f <- start_ggplot +geom_vline(xintercept=0, linetype="longdash")+
    geom_errorbarh(aes(xmax =  coef + qnorm(1 - 0.005)*se, 
                       xmin = coef + qnorm(0.005)*se),
                   size=0.6, height=0) +
    geom_errorbarh(aes(xmax =  coef + qnorm(1-0.025)*se, 
                       xmin = coef + qnorm(0.025)*se),
                   size=1.0, height=0)+
    geom_point(stat="identity", fill="white", size = 4)+
    scale_color_manual(name="Vignette Type",
                       values=c("firebrick3","dodgerblue3"))+
    scale_shape_manual(name="Vignette Type",values=c(21,23)) +
    scale_alpha_manual(values = alpha_values, name = "Vignette Type") + 
    theme_bbtop()+ ggtitle(main_title_name) + 
    xlab(x_title_name)+
    ylab("")+scale_y_discrete(breaks=NULL)
  if (factet_yes){
    f <- f + facet_wrap( ~ placebo, ncol=1, scales = scale_type) 
  }
  print(f)
  if (!is.null(images_directory)) {
    ggsave(paste0(images_directory, "/", file_name, ".pdf"), 
           height=out_height, width=out_width, dpi = 300, colormodel = print_colormodel)
  }  
}

# randomization procedure
head(d)
d$V <- ifelse(!is.na(d$ENEControl_1) | !is.na(d$ENETreatm_1) |
                !is.na(d$ENECon_mod_1) | !is.na(d$ENETreamod_1), "ENE", NA)
d$V[!is.na(d$basic.c_1) | !is.na(d$basic.t_1)] <- "Basic"
d$V <- factor(d$V, levels = c("Basic", "ENE"))
d$Z <- ifelse(!is.na(d$ENEControl_1) | !is.na(d$ENECon_mod_1) | 
                !is.na(d$basic.c_1), "Control", NA)
d$Z[!is.na(d$ENETreatm_1) | !is.na(d$ENETreamod_1) |
      !is.na(d$basic.t_1)] <- "Treatment"
d$Z <- factor(d$Z, levels = c("Control", "Treatment"))
d$female <- ifelse(d$Gender == 1, TRUE, FALSE)
d$mod <- ifelse(!is.na(d$ENECon_mod_1) | !is.na(d$ENETreamod_1), TRUE, FALSE)

# placebo outcomes non-standardized
placebo_labs <- c("Benefits Other Than Childcare",
                  "Helps with Family-Work Balance",
                  "Does Not Expect to Answer Emails on Weekends")
nonstd.res <- data.frame(v = rep(levels(d$V), each = 3),
                         placebo = rep(placebo_labs, 2),
                         coef = NA,
                         se = NA)
for (i in 1:2){
  nonstd.res[3*(i-1)+1, 3:4] <- 
    robustse(lm(other.ben_1 ~ Z, data = d[d$V == levels(d$V)[i],]))[2,1:2]
  nonstd.res[3*(i-1)+2, 3:4] <- 
    robustse(lm(wkfam.bal_1 ~ Z, data = d[d$V == levels(d$V)[i],]))[2,1:2]
  nonstd.res[3*(i-1)+3, 3:4] <- 
    robustse(lm(100-hours_1 ~ Z, data = d[d$V == levels(d$V)[i],]))[2,1:2]
}

rename_values <- function(variable, old_names, new_names) {
  for (i in 1:length(new_names)) {
    variable[variable == old_names[i]] <- new_names[i]
  }
  return(variable)
}

# plot density graphs
d$hours_1_rev <- 100 - d$hours_1
plot_data <- dplyr::select(d, Z, V, other.ben_1, wkfam.bal_1, hours_1_rev)[!is.na(d$V),] %>%
  reshape2::melt(measure.vars = c("other.ben_1", "wkfam.bal_1", "hours_1_rev"))
plot_data$variable <- as.character(plot_data$variable)
new_names <- c("Benefits Other Than Childcare",
               "Does Not Expect to Answer Emails on Weekends",
               "Helps with Family-Work Balance")
plot_data$variable <- rename_values(variable = plot_data$variable, 
                                    old_names = unique(plot_data$variable),
                                    new_names = new_names[c(1,3,2)])
plot_data$variable <- factor(plot_data$variable, levels = new_names)


all_combo <- expand.grid(Z = unique(plot_data$Z), 
                         V = unique(plot_data$V),
                         variable = unique(plot_data$variable))

get_density <- function(input_data, input_subset, smkde = TRUE, scale = 1) {
  input_subset <- as.character(unlist(input_subset))
  input_data <- input_data[as.character(input_data$Z) == input_subset[1] &
                             as.character(input_data$V) == input_subset[2] &
                             as.character(input_data$variable) == input_subset[3],]
  if (smkde) {
    xhist <- binning(input_data["value"], breaks=0, bw=1)
    den <- bda::bde(x=xhist, type="smkde", from=0, to=100) 
    temp <- data.frame(Z = as.character(unique(input_data$Z)), 
                       V = as.character(unique(input_data$V)),
                       variable = as.character(unique(input_data$variable)),
                       x = den$x, y = den$y)    
  } else {
    input_x <- unlist(input_data["value"])
    den <- density(input_x[!is.na(input_x)], n=1001, from=0, to=100, bw=scale/2)
    temp <- data.frame(Z = as.character(unique(input_data$Z)), 
                       V = as.character(unique(input_data$V)),
                       variable = as.character(unique(input_data$variable)),
                       x = den$x, y = den$y) 
  }
  return(temp)
}

d_res <- get_density(input_data = plot_data, input_subset = all_combo[1,])
for (i in 2:nrow(all_combo)) {
  d_res <- rbind(d_res, get_density(input_data = plot_data, 
                                    input_subset = all_combo[i,]))
}

# add in means
summary_data <- plot_data %>% group_by(Z, V, variable) %>% 
  dplyr::summarise(mean_value = mean(value, na.rm = TRUE)) %>% as.data.frame()
d_res <- merge(x = d_res, y = summary_data, all.x = TRUE)
# fix factors
d_res$Z <- factor(d_res$Z, levels = levels(plot_data$Z))
d_res$variable <- factor(d_res$variable, levels = levels(plot_data$variable))
d_res$V <- factor(d_res$V, levels = levels(plot_data$V))
# make the density plots
ggplot(d_res, 
       aes(x = x, y = y, fill = Z)) + 
  geom_area(position = "identity", alpha = 0.75, color = "black") + 
  xlim(0, 100) + facet_grid(V ~ variable) + theme_bb() +
  geom_vline(aes(xintercept = mean_value, color = Z), 
             linetype="longdash", size=1) +
  ylab("Density") + 
  xlab("Likelihood on 100 Point Scale") +
  scale_color_manual(name="Treatment Assignment", 
                     values=c("orange3", "steelblue4"),
                     labels = c("No Subsidized Childcare", 
                                "Subsidized Childcare")) + 
  scale_fill_manual(name="Treatment Assignment", 
                    values=c("orange1", "steelblue4"),
                    labels = c("No Subsidized Childcare", 
                               "Subsidized Childcare"))
ggsave(paste0(images_directory, "Figure_35.pdf"), 
       width = 11, height = 5, dpi = 300, colormodel = print_colormodel)  
print("Complete: Density plot")
Sys.time()

nonstd.res$v2 <- factor(as.character(nonstd.res$v), 
                        levels = rev(levels(nonstd.res$v)))
coef_plot_function(start_ggplot = ggplot(data = nonstd.res, 
                                         mapping = aes(x = coef, y = v2,
                                                       shape = v, color = v)),
                   file_name = "Figure_36",
                   x_title_name = "Difference in Likelihood on 100 Point Scale\n(Subsidized Childcare - No Subsidized Childcare)",
                   out_height = 4, out_width = 6, alpha_values = c(1,1,1),
                   main_title_name = NULL, print_colormodel = print_colormodel
)
print("Complete: Non-standardized coefficient plot")
Sys.time()

# standardized placebo coef plot
std.res <- nonstd.res
for (i in 1:2){
  std.res[3*(i-1)+1, 3:4] <- 
    robustse(lm(scale(other.ben_1, T, T) ~ Z, data = d[d$V == levels(d$V)[i],]))[2,1:2]
  std.res[3*(i-1)+2, 3:4] <- 
    robustse(lm(scale(wkfam.bal_1, T, T) ~ Z, data = d[d$V == levels(d$V)[i],]))[2,1:2]
  std.res[3*(i-1)+3, 3:4] <- 
    robustse(lm(scale(100-hours_1, T, T) ~ Z, data = d[d$V == levels(d$V)[i],]))[2,1:2]
}
coef_plot_function(start_ggplot = ggplot(data = std.res, 
                                         mapping = aes(x = coef, y = v2,
                                                       shape = v, color = v)),
                   file_name = "Figure_05",
                   x_title_name = "Standardized Difference in Likelihood\n(Subsidized Childcare - No Subsidized Childcare)",
                   out_height = 4, out_width = 6, alpha_values = c(1,1,1),
                   main_title_name = NULL, print_colormodel = print_colormodel
)
print("Complete: Standardized coefficient plot")
Sys.time()
